/**************************************************************************
	Replication do-file: "Corruption in Customs"
	Cyril Chalendard, Ana Fernandes, Gael Raballand and Bob Rijkers
	
	Created on: 01/07/2022
**************************************************************************/

clear all
set more off, perm
cap log close
cls

* Extra commands: remove the asterisk to install
*net install cleanplots, from("https://tdmize.github.io/data/cleanplots")
*set scheme cleanplots, perm

* ----------------------- DIRECTORIES AND FOLDERS ----------------------- *

* Directories in which data are stored
global main "PUT YOUR DIRECTORY PATH HERE"
cd "$folder"

* Folders
global outputdata = "$main\Output Data"
cap mkdir "$main\Figures"
global figures = "$main\Figures"

* ----------------------------- BEGINS HERE ----------------------------- *

* -------
* Dataset
* -------
use "$outputdata\CFRR.dta", clear // open dataset

* New variables
** Group brokers
egen nb = group(key_dec)
** Indicators of revenue loss
*** Revenue losses calculated using all declarations - each of the numbers entering the formulas is a coefficient from panel B of Table A22
gen li = tot_taxes_usd*exp(0.732*eis_f) - tot_taxes_usd if sample_reg==1 & eis_f!=.
gen liw = tot_taxes_usd*exp(1.112*eis_f) - tot_taxes_usd if sample_reg==1 & eis_f!=.
gen le = tot_taxes_usd*exp(1.659*eis_f) - tot_taxes_usd if sample_reg==1 & eis_f!=.
gen lew = tot_taxes_usd*exp(2.085*eis_f) - tot_taxes_usd if sample_reg==1 & eis_f!=.
gen lvt = tot_taxes_usd*exp(0.851*eis_f) - tot_taxes_usd if sample_reg==1 & eis_f!=.
*** Revenue losses calcualted using declarations with significant excess interaction - each of the numbers entering the formulas is a coefficient from panel B of Table A22
gen sli = cond((sample_reg==1 & eis_f!=. & sig_c_f==1), (tot_taxes_usd*exp(0.732*eis_f) - tot_taxes_usd), cond((sample_reg==1 & eis_f!=.), 0, .))
gen sliw = cond((sample_reg==1 & eis_f!=. & sig_c_f==1), (tot_taxes_usd*exp(1.112*eis_f) - tot_taxes_usd), cond((sample_reg==1 & eis_f!=.), 0, .))
gen sle = cond((sample_reg==1 & eis_f!=. & sig_c_f==1), (tot_taxes_usd*exp(1.659*eis_f) - tot_taxes_usd), cond((sample_reg==1 & eis_f!=.), 0, .))
gen slew = cond((sample_reg==1 & eis_f!=. & sig_c_f==1), (tot_taxes_usd*exp(2.085*eis_f) - tot_taxes_usd), cond((sample_reg==1 & eis_f!=.), 0, .))
gen slvt = cond((sample_reg==1 & eis_f!=. & sig_c_f==1), (tot_taxes_usd*exp(0.851*eis_f) - tot_taxes_usd), cond((sample_reg==1 & eis_f!=.), 0, .))
*** After delegated randomization - each of the numbers entering the formulas is a coefficient from Table A23
gen post_sli = cond((sample_int==1 & wfr!=. & eis_f!=. & sig_c_f==1), (exp(0.146*wfr + 0.043*eis_f + 1.279*wfr_eis_f)*tot_taxes_usd - tot_taxes_usd), cond((sample_int==1 & wfr!=. & eis_f!=.), 0, .))
gen post_sle = cond((sample_int==1 & wfr!=. & eis_f!=. & sig_c_f==1), (exp(0.267*wfr + eis_f*0.021 + wfr_eis_f*3.328)*tot_taxes_usd - tot_taxes_usd), cond((sample_int==1 & wfr!=. & eis_f!=.), 0, .))
gen post_slvt = cond((sample_int==1 & wfr!=. & eis_f!=. & sig_c_f==1), (exp(0.024*wfr + eis_f*0.184 + wfr_eis_f*0.479)*tot_taxes_usd - tot_taxes_usd), cond((sample_int==1 & wfr!=. & eis_f!=.), 0, .))
label var post_sli "Extra tax yield no corruption - price adjusted (internal) wfr"
label var post_sle "Extra tax yield no corruption - price adjusted (external) wfr"
label var post_slvt "Extra tax yield no corruption - valitrade price wfr"
*** Revenue loss by inspector and semester
foreach var of varlist tot_taxes_usd li liw le lew lvt sli sliw sle slew slvt {
	replace `var' = `var'/1000

	* Total
	egen t_is_`var' = sum(`var') if sample_reg==1 & eis_f!=. , by(is) // total for each inspector-semester pair
	egen t_bs_`var' = sum(`var') if sample_reg==1 & eis_f!=. , by(bs) // total for each broker-semester pair

	* Average
	/* Create auxiliary variables to make sure the variable is always defined even when significant excess interaction is 0. */
	egen AUXm_is_`var' = mean(`var') if sample_reg==1 & eis_f!=. , by(is)
	egen m_is_`var' = max(AUXm_is_`var') if sample_reg==1 & eis_f!=. , by(is)
	drop AUXm_is_`var'
	egen AUXm_bs_`var' = mean(`var') if sample_reg==1 & eis_f!=. , by(bs)
	egen m_bs_`var' = max(AUXm_bs_`var') if sample_reg==1 & eis_f!=. , by(bs)
	drop AUXm_bs_`var'

	* Share 
	/* First compute total by semester, then create the shares. */
	egen st_`var' = sum(`var') if sample_reg==1 & eis_f!=. , by(nc_sem)
	gen p_is_`var' = t_is_`var'/st_`var'
	gen p_bs_`var' = t_bs_`var'/st_`var'				
	replace `var' = `var'*1000
}
** Inspection share by different types of declarations
*** Inspector market share
cap drop n	
gen n = 1
egen N = sum(n) if sample_reg==1 & eis_f!=., by(nc_sem)
egen Ni = sum(n) if sample_reg==1 & eis_f!=., by(is)
gen i_ms = Ni/N
drop N Ni n
*** Market share - high stakes
gen n = 1 if e20==1 & htl_e!=. & eis_f!=.
egen N = sum(n) if sample_reg==1 & eis_f!=., by(nc_sem)
egen Ni = sum(n) if sample_reg==1 & eis_f!=., by(is)
gen i_ms_hs = Ni/N
drop N Ni n
*** Market share - low stakes
gen n = 1 if emin20==1 & htl_e!=. & eis_f!=.
egen N = sum(n) if sample_reg==1 & eis_f!=., by(nc_sem)
egen Ni = sum(n) if sample_reg==1 & eis_f!=., by(is)
gen i_ms_ls = Ni/N
drop N Ni n
** Averages
*** Share - high stakes
egen M_D20_E = mean(e20) if tot_taxes_usd!=. & htl_e!=. & sample_reg==1 , by(is)
*** Share - low stakes
egen M_Dmin20_E = mean(emin20) if tot_taxes_usd!=. & htl_e!=. & sample_reg==1 , by(is)
*** Tax yield per declaration
gen aMtp = 1000*m_is_tot_taxes_usd
*** Tax yield per declaration - high
gen tp1000= tot_taxes_usd/1000
egen E20Mtp = mean(tp1000) if e20==1 & tot_taxes_usd!=. & htl_e!=. & sample_reg==1 , by(is)
gen aE20Mtp = 1000*E20Mtp
*** Tax yield per declaration - low
egen EM20Mtp = mean(tp1000) if emin20==1 & tot_taxes_usd!=. & htl_e!=. & sample_reg==1 , by(is)
gen aEM20Mtp = 1000*EM20Mtp


* ---------
* Figure A3
* ---------

* Panel A: all declarations
* -------------------------
scatter aMtp i_m_excess if sample_reg==1 & eis_f!=., ///
	msymbol(o) mcolor(gs8) msize(medsmall) mlwidth(vvthin) ///
	|| lfit aMtp i_m_excess if sample_reg==1 & eis_f!=. , ///
	lpattern(dash) lw(medium) clcolor(gs5) ///
	xlabel(, nogrid) ///
	ylabel(0(10000)40000, nogrid) ///
	yscale(range(0(10000)40000)) ///
	xtitle("Share with excess interaction", size(small) color(black)) ///
	ytitle("Average tax yield per declaration", size(small) color(black)) ///
	legend(pos(6) ring(10) col(2) label(1 "All") label(2 "All - fitted")) ///
	name(Figure_A3a, replace)
graph export "$figures\Figure_A3a.eps", as(eps) replace
graph export "$figures\Figure_A3a.pdf", as(pdf) replace

* Panel B: heterogeneity by potential tax yield
* ---------------------------------------------
scatter aE20Mtp i_m_excess, ///
	msymbol(d) mcolor(gs7) msize(medsmall) mlwidth(vvthin) ///
	|| lfit aE20Mtp i_m_excess, ///
	lpattern(dash) lw(medium) clcolor(gs2) ///
	|| scatter aEM20Mtp i_m_excess, ///
	msymbol(s) mcolor(gs11) msize(medium) mlwidth(vvthin) ///
	|| lfit aEM20Mtp i_m_excess, ///
	lpattern(dash) lw(medium) clcolor(gs8) ///
	xlabel(, nogrid) ///
	ylabel(, nogrid) ///
	xtitle("Share with excess interaction", size(small) color(black)) ///
	ytitle("Average tax yield per declaration", size(small) color(black)) ///
	legend(pos(6) ring(10) col(2) label(1 "High potential yield") label(2 "High potential yield - fitted") label(3 "Low potential yield") label(4 "Low potential yield - fitted") order (1 3 2 4)) ///
	name(Figure_A3b, replace)
graph export "$figures\Figure_A3b.eps", as(eps) replace
graph export "$figures\Figure_A3b.pdf", as(pdf) replace


* -------------------------------- ENDS HERE -------------------------------- *